a<-c(1,2,5,3,6,-2,-4)
a
a
y<-matrix(1:20,nrow=5,ncol=4)
y
cells<-(1,26,24,68)
cells<-c(1,26,24,68)
rnames<-c("R1","R2")
nnames<-c("C1""C2")
nnames<-c("C1","C2")
mymatrix<-matrix(cells,nrow=2,ncol=2,byrow=true,dimnames=list(rnames,cnames))
mymatrix<-matrix(cells,nrow=2,ncol=2,byrow=TURE,dimnames=list(rnames,cnames))
mymatrix<-matrix(cells,nrow=2,ncol=2,byrow=TURE,dimnames=list(rnames,cnames))
cells<-c(1,26,24,68)> rnames<-c("R1","R2")
cells<-c(1,26,24,68)
> rnames<-c("R1","R2")
install.packages(c("Hmisc",RColorBrewer))
install.packages(c("Hmisc",“RColorBrewer”))
install.packages(c("Hmisc","RColorBrewer"))
install.packages("dplyr")
install.packages("ggplot2")
install.packages("esquisse")
install.packages("ggThemeAssist")
install.packages("ggrepel")
install.packages("VennDiagram")
install.packages("survival")
test1 = data.table::fread("./gdc_download_20241020_142732.815383/03953c9c-8b2b-4e61-bf59-ae75e67fe845/be87079d-9aba-406d-881b-c09077e837db.rna_seq.augmented_star_gen")
test1 = data.table::fread("./D:/TCGA/gdc_download_20241211_021101.693288\0be8ee2e-2e57-4b53-8ad8-f1cd4ae45980/eec21f58-5581-4eb9-8b8e-f839ae145494.rna_seq.augmented_star_gene_counts")
test1 = data.table::fread("./TCGA/gdc_download_20241211_021101.693288\0be8ee2e-2e57-4b53-8ad8-f1cd4ae45980/eec21f58-5581-4eb9-8b8e-f839ae145494.rna_seq.augmented_star_gene_counts")
test1 = data.table::fread("./TCGA/gdc_download_20241211_021101.693288\0be8ee2e-2e57-4b53-8ad8-f1cd4ae45980/eec21f58-5581-4eb9-8b8e-f839ae145494.rna_seq.augmented_star_gene")
test1 = data.table::fread("./gdc_download_20241211_021101.693288\0be8ee2e-2e57-4b53-8ad8-f1cd4ae45980/eec21f58-5581-4eb9-8b8e-f839ae145494.rna_seq.augmented_star_gene")
test1 = data.table::fread("./gdc_download_20241211_021101.693288\0be8ee2e-2e57-4b53-8ad8-f1cd4ae45980/eec21f58-5581-4eb9-8b8e-f839ae145494.rna_seq.augmented_star_gene_counts")
test1 = data.table::fread(".D:/TCGA/gdc_download_20241211_021101.693288\0be8ee2e-2e57-4b53-8ad8-f1cd4ae45980/eec21f58-5581-4eb9-8b8e-f839ae145494.rna_seq.augmented_star_gene_counts")
test1 = data.table::fread(".D:/TCGA/gdc_download_20241211_021101.693288\0be8ee2e-2e57-4b53-8ad8-f1cd4ae45980/eec21f58-5581-4eb9-8b8e-f839ae145494.rna_seq.augmented_star_gen")
test1 = data.table::fread(".D:/TCGA/gdc_download_20241211_021101.693288\0be8ee2e-2e57-4b53-8ad8-f1cd4ae45980/eec21f58-5581-4eb9-8b8e-f839ae145494.rna_seq.augmented_star_gen")
test1 = data.table::fread("./D:/TCGA/gdc_download_20241211_021101.693288\0be8ee2e-2e57-4b53-8ad8-f1cd4ae45980/eec21f58-5581-4eb9-8b8e-f839ae145494.rna_seq.augmented_star_gen")
source("~/.active-rstudio-document")
test1 = data.table::fread("./D:/TCGA/gdc_download_20241211_021101.693288\0be8ee2e-2e57-4b53-8ad8-f1cd4ae45980/eec21f58-5581-4eb9-8b8e-f839ae145494.rna_seq.augmented_star_gene_counts")
test1 = data.table::fread("./gdc_download_20241211_021101.693288\0be8ee2e-2e57-4b53-8ad8-f1cd4ae45980/eec21f58-5581-4eb9-8b8e-f839ae145494.rna_seq.augmented_star_gene_counts")
test1 = data.table::fread("./gdc_download_20241211_021101.693288/0be8ee2e-2e57-4b53-8ad8-f1cd4ae45980/eec21f58-5581-4eb9-8b8e-f839ae145494.rna_seq.augmented_star_gene_counts")
test1 = data.table::fread("./D:/TCGA/gdc_download_20241211_021101.693288/0be8ee2e-2e57-4b53-8ad8-f1cd4ae45980/eec21f58-5581-4eb9-8b8e-f839ae145494.rna_seq.augmented_star_gene_counts")
test1 = data.table::fread("./D:/TCGA/gdc_download_20241211_021101.693288/0be8ee2e-2e57-4b53-8ad8-f1cd4ae45980/eec21f58-5581-4eb9-8b8e-f839ae145494.rna_seq.augmented_star_gene")
test1 = data.table::fread("D:/TCGA/gdc_download_20241211_021101.693288/0be8ee2e-2e57-4b53-8ad8-f1cd4ae45980/eec21f58-5581-4eb9-8b8e-f839ae145494.rna_seq.augmented_star_gene")
test1 = data.table::fread("./gdc_download_20241211_021101.693288/0be8ee2e-2e57-4b53-8ad8-f1cd4ae45980/eec21f58-5581-4eb9-8b8e-f839ae145494.rna_seq.augmented_star_gene_counts")
test1 = data.table::fread("./gdc_download_20241211_021101.693288/0be8ee2e-2e57-4b53-8ad8-f1cd4ae45980/eec21f58-5581-4eb9-8b8e-f839ae145494.rna_seq.augmented_star_gene_counts")
test1 = data.table::fread("./gdc_download_20241211_021101.693288/0be8ee2e-2e57-4b53-8ad8-f1cd4ae45980/eec21f58-5581-4eb9-8b8e-f839ae145494.rna_seq.augmented_star_gene_counts")
test1 = data.table::fread("./gdc_download_20241211_021101.693288/0be8ee2e-2e57-4b53-8ad8-f1cd4ae45980/eec21f58-5581-4eb9-8b8e-f839ae145494.rna_seq.augmented_star_gen")
test1 = data.table::fread("./C:/Users/马明福1/Documents/gdc_download_20241211_021101.693288/0be8ee2e-2e57-4b53-8ad8-f1cd4ae45980/eec21f58-5581-4eb9-8b8e-f839ae145494.rna_seq.augmented_star_gen")
test1 = data.table::fread("./C:/Users/马明福1/Documents/gdc_download_20241211_021101.693288/0be8ee2e-2e57-4b53-8ad8-f1cd4ae45980/eec21f58-5581-4eb9-8b8e-f839ae145494.rna_seq.augmented_star_gene_counts.tsv")
test1 = data.table::fread("./C:/Users/马明福1/Documents/gdc_download_20241211_021101.693288/0be8ee2e-2e57-4b53-8ad8-f1cd4ae45980/eec21f58-5581-4eb9-8b8e-f839ae145494.rna_seq.augmented_star_gene_counts.tsv")
setwd("D:\TCGA")
#install.packages("rjson")
library("rjson")
json <- jsonlite::fromJSON("metadata.cart.2024-12-11.json")
file_sample0 <- json[c('file_name','associated_entities')]
View(file_sample0)
file_sample0$sample_id <- sapply(file_sample0$associated_entities,function(x){x[,1]})
file_sample <- subset(file_sample0, select = -associated_entities)
file_sample$file_name <- sapply(strsplit(file_sample$file_name[1],split='.tsv'),function(x){x[1]})  #对file_name列进行切割，以和表达矩阵的文件名一致
View(file_sample)
count_file <- list.files('gdc_download_20241211_021101.693288',pattern = '*rna_seq.augmented_star_gene_counts',recursive = TRUE)
count_file_name <- strsplit(count_file,split='/')
count_file_name <- sapply(count_file_name,function(x){x[2]})
COUNT_Ensembl_matrix <- data.frame()
for (i in 1:length(count_file_name)){
path <- paste0('gdc_download_20241211_021101.693288//',count_file[i])
data0 <- read.table(path,fill = TRUE,header = TRUE)
data <-data0[-c(1:4),c(1,4)] #取出unstranded列得到COUNT矩阵。若想提取fpkm-unstranded则改为data0[-c(1:4),c(1,8)]，fpkm-up-unstranded改为data0[-c(1:4),c(1,9)]
colnames(data)[2] <- file_sample[which(file_sample$file_name == count_file_name[i]),'sample_id']
COUNT_Ensembl_matrix <- if (nrow(COUNT_Ensembl_matrix) == 0) data else merge(COUNT_Ensembl_matrix, data, by = "gene_id")
}
COUNT_Ensembl_matrix <- data.frame()
for (i in 1:length(count_file_name)){
path <- paste0('gdc_download_20241211_021101.693288',count_file[i])
data0 <- read.table(path,fill = TRUE,header = TRUE)
data <-data0[-c(1:4),c(1,4)] #取出unstranded列得到COUNT矩阵。若想提取fpkm-unstranded则改为data0[-c(1:4),c(1,8)]，fpkm-up-unstranded改为data0[-c(1:4),c(1,9)]
colnames(data)[2] <- file_sample[which(file_sample$file_name == count_file_name[i]),'sample_id']
COUNT_Ensembl_matrix <- if (nrow(COUNT_Ensembl_matrix) == 0) data else merge(COUNT_Ensembl_matrix, data, by = "gene_id")
}
setwd("D:\TCGA")
#install.packages("rjson")
library("rjson")
json <- jsonlite::fromJSON("metadata.cart.2024-12-11.json")
file_sample0 <- json[c('file_name','associated_entities')]
View(file_sample0)
q()
rm(list=)
rm(list=ls())
a<-1
is.numeric(a)
is.numeric("1")
bar_date<-date.frame(Group=c("A","B","C"),value=c(1,2,3))
q()
#画图col指颜色，rainbow指彩虹色，2指2个颜色，因为2分类，lty线的类型，lwd线的宽度，xscale指年，xlabx轴标题，ylab指y轴标题
polt(surv_results,col=rainbow(2),lty=1,lwd=4,xscale=365,xlab="years form sample",ylab="survival")
rm(list = ls(all=T))
install.packages(easyTCGA)
install.packages("easyTCGA")
library(easyTCGA)
install.packages("easyTCGA")
library(easyTCGA)
install.packages("easyTCGA")
# 安装bioconductor上面的R包
options(BioC_mirror="https://mirrors.tuna.tsinghua.edu.cn/bioconductor")
if(!require("BiocManager")) install.packages("BiocManager")
if(!require("TCGAbiolinks")) BiocManager::install("TCGAbiolinks")
if(!require("SummarizedExperiment")) BiocManager::install("SummarizedExperiment")
if(!require("DESeq2")) BiocManager::install("DESeq2")
if(!require("limma")) BiocManager::install("limma")
# 安装cran上面的R包
if(!require("survival")) install.packages("survival")
if(!require("broom")) install.packages("broom")
if(!require("devtools")) install.packages("devtools")
if(!require("cli")) install.packages("cli")
devtools::install_github("ayueme/easyTCGA")
# 安装bioconductor上面的R包
options(BioC_mirror="https://mirrors.tuna.tsinghua.edu.cn/bioconductor")
if(!require("BiocManager")) install.packages("BiocManager")
if(!require("TCGAbiolinks")) BiocManager::install("TCGAbiolinks")
if(!require("SummarizedExperiment")) BiocManager::install("SummarizedExperiment")
if(!require("DESeq2")) BiocManager::install("DESeq2")
library(DESeq2)
if(!require("DESeq2")) BiocManager::install("DESeq2")
if(!require("edgeR")) BiocManager::install("edgeR")
library("edgeR")
if(!require("limma")) BiocManager::install("limma")
# 安装cran上面的R包
if(!require("survival")) install.packages("survival")
if(!require("broom")) install.packages("broom")
if(!require("devtools")) install.packages("devtools")
if(!require("cli")) install.packages("cli")
devtools::install_github("ayueme/easyTCGA")
# 安装bioconductor上面的R包
options(BioC_mirror="https://mirrors.tuna.tsinghua.edu.cn/bioconductor")
if(!require("BiocManager")) install.packages("BiocManager")
if(!require("TCGAbiolinks")) BiocManager::install("TCGAbiolinks")
if(!require("SummarizedExperiment")) BiocManager::install("SummarizedExperiment")
if(!require("DESeq2")) BiocManager::install("DESeq2")
if(!require("edgeR")) BiocManager::install("edgeR")
if(!require("limma")) BiocManager::install("limma")
# 安装cran上面的R包
if(!require("survival")) install.packages("survival")
if(!require("broom")) install.packages("broom")
if(!require("devtools")) install.packages("devtools")
# 安装bioconductor上面的R包
options(BioC_mirror="https://mirrors.tuna.tsinghua.edu.cn/bioconductor")
if(!require("BiocManager")) install.packages("BiocManager")
if(!require("TCGAbiolinks")) BiocManager::install("TCGAbiolinks")
if(!require("SummarizedExperiment")) BiocManager::install("SummarizedExperiment")
if(!require("DESeq2")) BiocManager::install("DESeq2")
if(!require("edgeR")) BiocManager::install("edgeR")
if(!require("limma")) BiocManager::install("limma")
# 安装cran上面的R包
if(!require("survival")) install.packages("survival")
if(!require("broom")) install.packages("broom")
if(!require("devtools")) install.packages("devtools")
if(!require("broom")) install.packages("broom")
if(!require("devtools")) install.packages("devtools")
install.packages("devtools")
if(!require("cli")) install.packages("cli")
devtools::install_github("ayueme/easyTCGA")
install.packages("usethis")
install.packages("usethis")
# 安装bioconductor上面的R包
options(BioC_mirror="https://mirrors.tuna.tsinghua.edu.cn/bioconductor")
if(!require("BiocManager")) install.packages("BiocManager")
if(!require("TCGAbiolinks")) BiocManager::install("TCGAbiolinks")
if(!require("SummarizedExperiment")) BiocManager::install("SummarizedExperiment")
if(!require("DESeq2")) BiocManager::install("DESeq2")
if(!require("edgeR")) BiocManager::install("edgeR")
if(!require("limma")) BiocManager::install("limma")
# 安装cran上面的R包
if(!require("survival")) install.packages("survival")
if(!require("broom")) install.packages("broom")
if(!require("devtools")) install.packages("devtools")
if(!require("cli")) install.packages("cli")
devtools::install_github("ayueme/easyTCGA")
devtools::install_github("ayueme/easyTCGA")
q()
BiocManager::install("easyTCGA")
BiocManager::install("easyTCGA")
BiocManager::install("easyTCGA")
library(easyTCGA)
url<-"https://bioconductor.org/packages/3.20/data/annotation/src/contrib/org.Hs.eg.db_3.20.0.tar.gz"
destfile<-"local_file.txt"
download.file(url,destfile,method = "auto",timeout=600)
BiocManager::install("org.Hs.eg.db")
library(GSVA)
library(GSVA)
library(ggplot2)
library(limma)
library(pheatmap)
library(ggsci)
library(clusterProfiler)
library(enrichplot)
library(patchwork)
library(org.Hs.eg.db)
library(tidyverse)
library(msigdbr)
library(GSEABase)
library(GSVA)
exp_File="expMatrix.csv" #表达数据,行为基因名,列为样本
target_gene="HIF1A" #目的基因名称
#此部分修改，不修改均可
#########################################
adjust.fdr=0.05 #差异基因的fdr值
logFC=1 #差异倍数
out_all_diff="all_diff.txt" #输出差异文件1
out_sig_diff="sig_diff.txt" #输出差异文件2
out_volcano="volcano_plot.pdf" #输出火山图
GSVA_result="GSVA_result.csv" #输出GSVA结果
#此部分，不用读者修改
rt=read.table(exp_File,sep=",",header=T,check.names=F)
setwd("C:/Users/马明福1/Desktop/GEO数据库/GSE161533")
exp_File="expMatrix.csv" #表达数据,行为基因名,列为样本
target_gene="HIF1A"
adjust.fdr=0.05 #差异基因的fdr值
logFC=1 #差异倍数
out_all_diff="all_diff.txt" #输出差异文件1
out_sig_diff="sig_diff.txt" #输出差异文件2
out_volcano="volcano_plot.pdf" #输出火山图
GSVA_result="GSVA_result.csv" #输出GSVA结果
#############################
####计算高低表达组，并绘制火山图###
#此部分，不用读者修改
rt=read.table(exp_File,sep=",",header=T,check.names=F)
rt=as.matrix(rt)
rownames(rt)=rt[,1]
exp=rt[,2:ncol(rt)]
dimnames=list(rownames(exp),colnames(exp))
data=matrix(as.numeric(as.matrix(exp)),nrow=nrow(exp),dimnames=dimnames)
data=avereps(data)
expr=data
group = ifelse(data[c(target_gene),]>median(data[c(target_gene),]), "High", "Low")
group = factor(group,levels = c("High","Low"))
design = model.matrix(~0+group)
colnames(design) = levels(group)
model = lmFit(data,design)
matrix.data = makeContrasts(High-Low,levels=design)#顺序为:对照组-实验组
model.fit = contrasts.fit(model, matrix.data)
model.fit = eBayes(model.fit)
diff=topTable(model.fit,adjust='fdr',number=nrow(data))
difftab=rbind(id=colnames(diff),diff)
write.table(difftab,file=out_all_diff,sep="\t",quote=F,col.names=F)
sig.gene=ifelse(diff$adj.P.Val>adjust.fdr,'No', ifelse( diff$logFC > logFC,'Up', ifelse( diff$logFC < (-logFC),'Down','No')))
table(sig.gene)
max_y=max(-log10(diff$adj.P.Val))
max_x=max(diff$logFC)
min_x=min(diff$logFC)
#绘制火山图
pdf("volcano_plot.pdf",8,6,family = "serif")
p1 <- ggplot(diff, aes(x =logFC, y=-log10(adj.P.Val), colour=sig.gene)) + geom_point(alpha=0.9, size=2) + scale_color_manual(values=c("blue", "black","red")) + ylim(c(0,max_y+2))+ xlim(c(-max_x-0.3, max_x+1)) + geom_vline(xintercept=c(-logFC,logFC),lty=4,col="black",lwd=0.8) + geom_hline(yintercept = -log10(adjust.fdr), lty=4,col="black",lwd=0.8) + labs(x="log2 Fold Change", y="-log10FDR") + theme_bw() + theme(plot.title = element_text(hjust = 0.5), legend.position.inside =c(0.1,0.9), legend.background = element_blank(), legend.title = element_blank(), axis.title =element_text(size=12), legend.text = element_text(size=12), text = element_text(face = "bold") )
p1
dev.off()
diff$sig=sig.gene
sig.diff.up=subset(diff,diff$sig=="Up")
sig.diff.down=subset(diff,diff$sig=="Down")
all.sig.diff=rbind(sig.diff.up,sig.diff.down)
all.sig.diff.out=cbind(symbol=rownames(all.sig.diff),all.sig.diff)
write.table(all.sig.diff.out,file=out_sig_diff,sep="\t",quote=F,col.names=F)
##计算GSVA评分##
#此部分不用读者修改
gene = bitr(unique(all.sig.diff.out$symbol), fromType = "SYMBOL", toType = c( "ENTREZID"), OrgDb = org.Hs.eg.db)
diff_gene_table=merge(all.sig.diff.out,gene,by.y='SYMBOL',by.x='symbol')
data_arrange = diff_gene_table %>% arrange(desc(logFC))
msigdbr_collections()
category_C2 = msigdbr(species = "Homo sapiens",category = "C2") #subcategory="KEGG",指定子目录
colnames(category_C2)
C2_data = dplyr::select(category_C2, gs_name, gene_symbol,gs_exact_source)
C2_list <- split(C2_data$gene_symbol, C2_data$gs_name)
GSVA_result = gsva(expr = as.matrix(data), min.sz=1, max.sz=Inf, method="gsva", kcdf="Gaussian", gset.idx.list = C2_list, parallel.sz=11)
GSVA_result = as.data.frame(GSVA_result)GSVA_result_C2=cbind(C2=rownames(GSVA_result),GSVA_result)
